### import datadf_raw<-read.csv(file=file.path(path,'data','raw_data','outcome.csv'))#### Clean data source("preprocess.R")df<-preprocess_data(df_raw)# items to use items_to_use<-'MADRS.S'# options are #MADRS.S, LSAS.SR, PDSS.SRitemlabels<-itemlabels%>%as_tibble()%>%filter(str_detect(itemnr, items_to_use))# DIF+aux variables to use # DIF not used 'marital_status','children', 'working'dif_variables<-c('Patient','Treatment','sex','age','TreatmentAccessStart','education')### Make a backup of the dataframe, in case you need to revert changes at some pointdf.all<-dfprint(itemlabels)
##### Before filtering out participants, you should check the missing data structure using RImissing() and RImissingP()# If you want to include participants with missing data, input the minimum number of items responses that a participant should have to be included in the analysis:min.responses<-4# In our case if they have missing on one item all items for that time are missing. # Select the variables we will work with, and filter out respondents with missing datadf<-df%>%select(all_of(c(dif_variables,"Time",itemlabels$itemnr)))%>%# vfilter(rowSums(is.na(select(.,all_of(itemlabels$itemnr))))<min.responses)
4 Create DIF variables
Code
#---- Create DIF variables----# DIF variables into vectors, recoded as factors since DIF functions need this# these could also be stored in its own dataframe (not a tibble) instead of as vectors# Named vector for the new typestype_transform<-c(Treatment ="factor", sex ="factor", age ="numeric", TreatmentAccessStart ="integer",education="factor",Time='factor')# Transform columns based on the named vectordif<-df%>%mutate(across(names(type_transform), ~switch(type_transform[which(names(type_transform)==cur_column())], "integer"=as.integer(.), "character"=as.character(.),"factor"=as.factor(.), "numeric"=as.numeric(.))))%>%as.data.frame(.)%>%select(!all_of(itemlabels$itemnr))# then remove them from dataframe, since we need a dataframe with only item data for the Rasch analysesdf<-df%>%select(all_of(c("Time",itemlabels$itemnr)))# add time here ? dfnotime<-df%>%select(!Time)source("RISE_theme.R")
5 Demographics descriptives
Code
dif_spec<-dif%>%filter(Time=='SCREEN')%>%select(!all_of(c("Time","Patient")))summary(dif_spec)# RIdemographics(dif_spec,'Treatment') <- function crashes TODO make own
Treatment sex age TreatmentAccessStart education
MDD:2987 F:4005 Min. :18.00 Min. :2008 Primary : 504
PD :1719 M:2456 1st Qu.:27.00 1st Qu.:2012 Secondary :3152
SAD:1755 Median :33.00 Median :2014 Postsecondary:2805
Mean :35.38 Mean :2014
3rd Qu.:42.00 3rd Qu.:2017
Max. :84.00 Max. :2019
6 Overall number of responses
Code
# Collapsed RIallresp(dfnotime)# Seperate RIallresp_over_times<-df%>%# split(.$Time)%>%# split the data map(~RIallresp(.x%>%dplyr::select(!Time)))#+ labs(title = .x$Time)) # create separate for each time# make nice later RI_allresp_kable_grid=combine_kables_grid(RIallresp_over_times,cols=3)RI_allresp_kable_grid
Response category
Number of responses
Percent
0
147379
24.9
1
148593
25.1
2
152831
25.8
3
67964
11.5
4
61598
10.4
5
10877
1.8
6
2301
0.4
x
SCREEN.Response category
SCREEN.Number of responses
SCREEN.Percent
PRE.Response category
PRE.Number of responses
PRE.Percent
WEEK01.Response category
WEEK01.Number of responses
WEEK01.Percent
0
8232
14.2
0
9700
17.3
0
9626
20.1
1
7983
13.7
1
9965
17.8
1
10868
22.7
2
15050
25.9
2
15616
27.9
2
13754
28.8
3
10126
17.4
3
9146
16.3
3
6674
14.0
4
13231
22.8
4
9629
17.2
4
5814
12.2
5
2816
4.8
5
1608
2.9
5
935
2.0
6
711
1.2
6
307
0.5
6
155
0.3
WEEK02.Response category
WEEK02.Number of responses
WEEK02.Percent
WEEK03.Response category
WEEK03.Number of responses
WEEK03.Percent
WEEK04.Response category
WEEK04.Number of responses
WEEK04.Percent
0
10426
21.2
0
10968
22.7
0
11626
24.7
1
11747
23.9
1
12365
25.6
1
12516
26.6
2
14076
28.6
2
13422
27.8
2
12690
26.9
3
6367
12.9
3
5755
11.9
3
5254
11.2
4
5502
11.2
4
4697
9.7
4
4155
8.8
5
884
1.8
5
856
1.8
5
722
1.5
6
183
0.4
6
150
0.3
6
134
0.3
WEEK05.Response category
WEEK05.Number of responses
WEEK05.Percent
WEEK06.Response category
WEEK06.Number of responses
WEEK06.Percent
WEEK07.Response category
WEEK07.Number of responses
WEEK07.Percent
0
11754
26.2
0
11745
27.3
0
11877
28.9
1
12299
27.4
1
12301
28.6
1
11965
29.1
2
11657
26.0
2
10933
25.4
2
10044
24.5
3
4732
10.6
3
4201
9.8
3
3801
9.3
4
3639
8.1
4
3242
7.5
4
2804
6.8
5
639
1.4
5
525
1.2
5
460
1.1
6
100
0.2
6
136
0.3
6
98
0.2
WEEK08.Response category
WEEK08.Number of responses
WEEK08.Percent
WEEK09.Response category
WEEK09.Number of responses
WEEK09.Percent
WEEK10.Response category
WEEK10.Number of responses
WEEK10.Percent
0
12015
30.6
0
11977
32.1
0
11951
34.0
1
11516
29.3
1
11197
30.0
1
10795
30.7
2
9632
24.5
2
8719
23.4
2
7755
22.0
3
3218
8.2
3
2913
7.8
3
2580
7.3
4
2431
6.2
4
2113
5.7
4
1775
5.0
5
408
1.0
5
335
0.9
5
272
0.8
6
92
0.2
6
69
0.2
6
71
0.2
POST.Response category
POST.Number of responses
POST.Percent
" "
" "
0
15482
34.9
1
13076
29.5
2
9483
21.4
3
3197
7.2
4
2566
5.8
5
417
0.9
6
95
0.2
7 Descriptives of raw data
Response distribution for all items are summarized below.
#df_test = df %>% filter(Time=='SCREEN') %>% select(!Time)# Across time # seperated time - might need to fix this remake in ggplot?png(filename=paste0("./plots/",itemStart,"_rawdist_over_time.png"),width=800,height=2400)par(mfrow=c(7,2),mar =c(4, 4, 4, 2))# c(5 <- bottom lines, 4, 4, 2)RIrawdist_over_times<-df%>%# split(.$Time)%>%# split the data imap(~{RIrawdist(.x%>%dplyr::select(!Time))mtext(paste("Time:", .y), side =1, line =3, adj =0, cex =0.8)})dev.off()
The eRm package, which uses Conditional Maximum Likelihood (CML) estimation, will be used primarily. For this analysis, the Partial Credit Model will be used.
Due to the amount of data across timepoints targeting will first be inspected, and the best fitting timepoint will be chosen based on targeting, which in turn will inform the rest of the analyses.
8.1.1 Targeting
Code
# increase fig-height above as needed, if you have many items og. value was 5. #RItargeting(dfnotime)require(patchwork)targeting_time_plots<-ggplots_with_spec_func_over_time(df,func_call =RItargeting,title_all='', blankx=FALSE,colsplot =3,rowsplot=5,return_list_plots=TRUE)good_layout_targeting_plots<-wrap_plots(targeting_time_plots, ncol =3)+plot_layout(guides="collect")#print(good_layout_targeting_plots)ggsave( filename =paste0("./plots/",items_to_use,"_targeting_over_time.png"), plot =good_layout_targeting_plots, width =20, # Increase width (in inches) height =45, # Increase height (in inches) dpi =300# High resolution)print(good_layout_targeting_plots)
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_2 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_3 grouping_based_on_score NaN NaN NA NA ? NA NA
4 MADRS.S_4 grouping_based_on_score NaN NaN NA NA ? NA NA
5 MADRS.S_5 grouping_based_on_score NaN NaN NA NA ? NA NA
6 MADRS.S_6 grouping_based_on_score NaN NaN NA NA ? NA NA
7 MADRS.S_7 grouping_based_on_score NaN NaN NA NA ? NA NA
8 MADRS.S_8 grouping_based_on_score NaN NaN NA NA ? NA NA
9 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_2 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_4 grouping_based_on_score NaN NaN NA NA ? NA NA
4 MADRS.S_5 grouping_based_on_score NaN NaN NA NA ? NA NA
5 MADRS.S_6 grouping_based_on_score NaN NaN NA NA ? NA NA
6 MADRS.S_7 grouping_based_on_score NaN NaN NA NA ? NA NA
7 MADRS.S_8 grouping_based_on_score NaN NaN NA NA ? NA NA
8 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_2 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_5 grouping_based_on_score NaN NaN NA NA ? NA NA
4 MADRS.S_6 grouping_based_on_score NaN NaN NA NA ? NA NA
5 MADRS.S_7 grouping_based_on_score NaN NaN NA NA ? NA NA
6 MADRS.S_8 grouping_based_on_score NaN NaN NA NA ? NA NA
7 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_5 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_6 grouping_based_on_score NaN NaN NA NA ? NA NA
4 MADRS.S_7 grouping_based_on_score NaN NaN NA NA ? NA NA
5 MADRS.S_8 grouping_based_on_score NaN NaN NA NA ? NA NA
6 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_5 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_6 grouping_based_on_score NaN NaN NA NA ? NA NA
4 MADRS.S_7 grouping_based_on_score NaN NaN NA NA ? NA NA
5 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
5 and 6 seems to load to something else and 5 has the worst fit (infit outfit). Also the partial gamma in relation to 5 and 6. While 7 underfits in the boostrap, the infit/outfit is not as deviant as 5. Removing 5.
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_6 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_7 grouping_based_on_score NaN NaN NA NA ? NA NA
4 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_7 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
Item Var gamma se pvalue padj.BH sig lower upper
1 MADRS.S_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 MADRS.S_6 grouping_based_on_score NaN NaN NA NA ? NA NA
3 MADRS.S_9 grouping_based_on_score NaN NaN NA NA ? NA NA
pfit_u3poly<-PerFit::U3poly(matrix =df_r3, Ncat =7, # make sure to input number of response categories, not thresholds IRT.PModel ="PCM")misfits<-PerFit::flagged.resp(pfit_u3poly)%>%pluck("Scores")%>%as.data.frame()%>%pull(FlaggedID)misfits2<-RIpfit(df_r3, output ="rowid")
The ordinal sum to interval score contains the information to transform into the below thetas, but we plot them here for convinience (based on 3 items)
Code
est_thetas2<-RIestThetas(df_r3, method ="WLE")hist(est_thetas2$WLE, col ="#009ca6", main ="Histogram of person locations (thetas)", breaks =17)